options(java.parameters = "-Xmx128000m")
library(doParallel)
library(foreach)
library(tictoc)
library(glmnet)
library(caret)

source("generate_testing_data.R")

nfolds = 5
ntrials = 10000
cvtrials = 1000


PAV <- function (T, That, Y) {
  n=length(Y)
  n1=sum(T)
  n0=n-n1
  SAV=1/n1*sum(T*That*Y)+1/n0*sum(Y*(1-T)*(1-That))
  Sf1=var((That*Y)[T==1])
  Sf0=var(((1-That)*Y)[T==0])
  varexp=Sf1/n1+Sf0/n0
  return(list(pav=SAV,sd=sqrt(max(varexp,0))))
}

PAPE <- function (T, That, Y) {
  n=length(Y)
  n1=sum(T)
  n0=n-n1
  n1h=sum(That)
  n0h=n-n1h
  probs=sum(That)/n
  SAPE=n/(n-1)*(1/n1*sum(T*That*Y)+1/n0*sum(Y*(1-T)*(1-That))-n1h/(n1*n)*sum(Y*T)-n0h/(n0*n)*sum(Y*(1-T)))
  Sf1=var(((That-probs)*Y)[T==1])
  Sf0=var(((That-probs)*Y)[T==0])
  SATE=1/n1*sum(T*Y)-1/n0*(sum((1-T)*Y))
  covarterm=1/n^2*(SAPE^2+2*(n-1)*SAPE*SATE*(2*probs-1)-(1-probs)*probs*n*SATE^2)
  varexp=n^2/(n-1)^2*(Sf1/n1+Sf0/n0+covarterm)
  return(list(pape=SAPE,sd=sqrt(max(varexp,0))))
}
PAPEp <- function (T, That, Y, plim) {
  n=length(Y)
  n1=sum(T)
  n0=n-n1
  n1h=sum(That)
  n0h=n-n1h
  SAPEfp=1/n1*sum(T*That*Y)+1/n0*sum(Y*(1-T)*(1-That))-plim/n1*sum(Y*T)-(1-plim)/n0*sum(Y*(1-T))
  Sfp1=var(((That-plim)*Y)[T==1])
  Sfp0=var(((That-plim)*Y)[T==0])
  kf1=mean(Y[T==1 & That==1])-mean(Y[T==0 & That==1])
  kf0=mean(Y[T==1 & That==0])-mean(Y[T==0 & That==0])
  varfp=Sfp1/n1+Sfp0/n0+floor(n*plim)*(n-floor(n*plim))/(n^2*(n-1))*((2*plim-1)*kf1^2-2*plim*kf1*kf0)
  return(list(papep=SAPEfp,sd=sqrt(max(varfp,0))))
}

PAPD <- function (T, Thatfp, Thatgp, Y, plim) {
  n=length(Y)
  n1=sum(T)
  n0=n-n1
  SAPEfp=1/n1*sum(T*Thatfp*Y)+1/n0*sum(Y*(1-T)*(1-Thatfp))-plim/n1*sum(Y*T)-(1-plim)/n0*sum(Y*(1-T))
  SAPEgp=1/n1*sum(T*Thatgp*Y)+1/n0*sum(Y*(1-T)*(1-Thatgp))-plim/n1*sum(Y*T)-(1-plim)/n0*sum(Y*(1-T))
  Sfp1=var(((Thatfp-plim)*Y)[T==1])
  Sfp0=var(((Thatfp-plim)*Y)[T==0])
  kf1=mean(Y[T==1 & Thatfp==1])-mean(Y[T==0 & Thatfp==1])
  kf0=mean(Y[T==1 & Thatfp==0])-mean(Y[T==0 & Thatfp==0])
  PAPD=SAPEfp-SAPEgp
  Sfgp1=var(((Thatfp-Thatgp)*Y)[T==1])
  Sfgp0=var(((Thatfp-Thatgp)*Y)[T==0])
  kg1=mean(Y[T==1 & Thatgp==1])-mean(Y[T==0 & Thatgp==1])
  kg0=mean(Y[T==1 & Thatgp==0])-mean(Y[T==0 & Thatgp==0])
  varfgp=Sfgp1/n1+Sfgp0/n0-floor(n*plim)*(n-floor(n*plim))/(n^2*(n-1))*(kf1^2+kg1^2)+
    2*floor(n*plim)*max(floor(n*plim),n-floor(n*plim))/(n^2*(n-1))*abs(kf1*kg1)
  if (varfgp>0) {
    return(list(papd=PAPD,sd=sqrt(varfgp)))
  } else {
    return(list(papd=PAPD,sd=0))
  }
}

AUPEC <- function (T, tau, Y) {
  n=length(Y)
  n1=sum(T)
  n0=n-n1
  pf=sum(as.numeric(tau>0))/n
  if (pf>0) {
  Z=rbinom(1e4,n,pf)
  Z=Z[Z>0]
  ThatfA=numeric(n)
  kAf1=numeric(n)
  kAf0=numeric(n)
  kAf1A=numeric(n)
  kAf1B=numeric(n)
  kAf0A=numeric(n)
  kAf0B=numeric(n)
  covarsum=0
  for (i in 1:n) {
    cutofftemp=quantile(tau,1-i/n)
    Thatftemp=as.numeric(tau>cutofftemp)
    ThatftempA=as.numeric(tau>max(cutofftemp,0))
    ThatfA=ThatfA+1/n*ThatftempA
    cutofftemp2=quantile(tau,(i-1)/n)
    Thatftemp2=as.numeric(tau>cutofftemp2)
    tempkAf1=mean(Y[T==1 & Thatftemp2==1])-mean(Y[T==0 & Thatftemp2==1])
    tempkAf0=mean(Y[T==1 & Thatftemp==0])-mean(Y[T==0 & Thatftemp==0])
    if (is.nan(tempkAf1)) {
      kAf1[n-i+1]=kAf1[n-i+2]
    } else {
      kAf1[n-i+1]=tempkAf1
    }
    if (is.nan(tempkAf0)) {
      kAf0[i]=kAf0[i-1]
    } else {
      kAf0[i]=tempkAf0
    }
    kAf1A[n-i+1]=(n-i+1)*kAf1[n-i+1]
    kAf1B[n-i+1]=(i-1)*kAf1[n-i+1]
    kAf0A[i]=i*kAf0[i]
    kAf0B[i]=(n-i)*kAf0[i]
  }
  sumtemp1=cumsum(kAf1A*kAf0B)
  sumtemp2=cumsum(kAf1A)
  sumtemp3=cumsum(kAf1A*kAf1B)
  tempM=outer(kAf1A,kAf1B)
  tempM[lower.tri(tempM,diag=TRUE)] <- 0
  tempMsum=cumsum(colSums(tempM))
  covarsum1=mean(-1/(n^3*(n-1))*sumtemp1[Z]-Z*(n-Z)^2/(n^3*(n-1))*kAf1[Z]*kAf0[Z]-2/(n^4*(n-1))*tempMsum[Z]-Z^2*(n-Z)^2/(n^4*(n-1))*kAf1[Z]^2
                 -2*(n-Z)^2/(n^4*(n-1))*kAf1[Z]*sumtemp2[Z]+1/n^4*sumtemp3[Z])
  covarsum2=var(1/n*(sumtemp2[Z]/n+(n-Z)*Z/n*kAf1[Z]))
  ThatfA2=ThatfA-1/2
  SfA1=var((ThatfA2*Y)[T==1])
  SfA0=var((ThatfA2*Y)[T==0])
  varfA=SfA1/n1+SfA0/n0+covarsum1+covarsum2
  AUPEC=1/n1*sum(T*ThatfA*Y)+1/n0*sum(Y*(1-T)*(1-ThatfA))-0.5/n1*sum(T*Y)-0.5/n0*sum((1-T)*Y)
  return(list(aupec=AUPEC,sd=sqrt(max(varfA,0))))
  } else {
    AUPEC=1/n0*sum(Y*(1-T))-0.5/n1*sum(T*Y)-0.5/n0*sum((1-T)*Y)
    return(list(aupec=AUPEC,sd=0))
  }
}

PAVcv <- function(T, That, Y, ind) {
  nfolds = max(ind)
  n = length(Y)
  n1 = sum(T)
  n0 = n - n1
  pavfold = c()
  sdfold = c()
  Sf1 = 0
  Sf0 = 0  
  covijtauij = 0
  n1n1 = n1*(n1-1)
  n1n0 = n0*n1
  n0n0 = n0*(n0-1)
  ThatYT1mean = apply(That*Y*T,1,mean)
  ThatYT0mean = apply(That*Y*(1-T),1,mean)
  for (i in 1:nfolds) {
    output = PAV(T[ind==i],That[ind==i,i],Y[ind==i])
    m = length(T[ind==i])
    m1 = sum(T[ind==i])
    m0 = m - m1
    probs=sum(That[ind==i,i])/m
    Sf1=Sf1 + var((That[,i]*Y)[T==1 & ind==i])/(m1*nfolds)
    Sf0=Sf0 + var(((1-That[,i])*Y)[T==0 & ind==i])/(m0*nfolds)
    pavfold = c(pavfold, output$pav)
    sdfold = c(sdfold, output$sd)
    covijtauij = covijtauij + (((sum((That[,i]*Y*T))^2-sum((That[,i]*Y^2*T)))/n1n1 -  
                                                  2*sum((That[,i]*Y*T))*sum(That[,i]*Y*(1-T))/n1n0 +
                                                  (sum((That[,i]*Y*(1-T)))^2-sum((That[,i]*Y^2*(1-T))))/n0n0) -
                                                 ((sum(ThatYT1mean)^2-sum(ThatYT1mean^2))/n1n1 -  
                                                    (2*sum(ThatYT1mean)*sum(ThatYT0mean))/n1n0 +
                                                    (sum(ThatYT0mean)^2-sum(ThatYT0mean^2))/n0n0)) / nfolds
  }
  mF = n / nfolds
  SF2 = var(pavfold)
  varcv = Sf1+Sf0
  varexp = (varcv + covijtauij) - (nfolds - 1)/ nfolds * min(SF2,(varcv + covijtauij))
  return(list(pav=mean(pavfold),sd=sqrt(max(varexp,0))))
}

PAPEcv <- function(T, That, Y, ind) {
  nfolds = max(ind)
  n = length(Y)
  n1 = sum(T)
  n0 = n - n1
  papefold = c()
  pF = mean(That)
  tau = 1/n1*sum(T*Y)-1/n0*(sum((1-T)*Y))
  eitaui = sum(That*Y*T)/(n1*nfolds)-sum(That*Y*(1-T))/(n0*nfolds)
  Sf1 = 0
  Sf0 = 0  
  covij = 0
  covijtaui = 0
  covijtauij = 0
  n1n1 = n1*(n1-1)
  n1n0 = n0*n1
  n0n0 = n0*(n0-1)
  Thatmean = apply(That,1,mean)
  ThatYT1mean = apply(That*Y*T,1,mean)
  ThatYT0mean = apply(That*Y*(1-T),1,mean)
  for (i in 1:nfolds) {
    output = PAPE(T[ind==i],That[ind==i,i],Y[ind==i])
    m = length(T[ind==i])
    m1 = sum(T[ind==i])
    m0 = m - m1
    probs=sum(That[ind==i,i])/m
    Sf1=Sf1 + var(((That[,i]-probs)*Y)[T==1 & ind==i])/(m1*nfolds)
    Sf0=Sf0 + var(((That[,i]-probs)*Y)[T==0 & ind==i])/(m0*nfolds)
    papefold = c(papefold, output$pape)
    covij = covij + (m-2)*(m-3)/(m-1)^2*tau^2*((sum(That[,i])^2-sum(That[,i])-sum(Thatmean)^2+sum(Thatmean^2))/(n*(n-1))) / nfolds
    covijtaui = covijtaui + 2*(m-2)^2/(m-1)^2*tau*((sum(That[,i])-1)*(sum((That[,i]*Y*T))/((n-1)*n1)-sum((That[,i]*Y*(1-T)))/((n-1)*n0)) -
                                                     ((sum(Thatmean)*sum(ThatYT1mean)-sum(Thatmean*ThatYT1mean))/((n-1)*n1)-
                                                        (sum(Thatmean)*sum(ThatYT0mean)-sum(Thatmean*ThatYT0mean))/((n-1)*n0)))/ nfolds
    covijtauij = covijtauij + (m^2-2*m+2)/(m-1)^2*(((sum((That[,i]*Y*T))^2-sum((That[,i]*Y^2*T)))/n1n1 -  
                                                 2*sum((That[,i]*Y*T))*sum(That[,i]*Y*(1-T))/n1n0 +
                                                 (sum((That[,i]*Y*(1-T)))^2-sum((That[,i]*Y^2*(1-T))))/n0n0) -
                                                 ((sum(ThatYT1mean)^2-sum(ThatYT1mean^2))/n1n1 -  
                                                    (2*sum(ThatYT1mean)*sum(ThatYT0mean))/n1n0 +
                                                    (sum(ThatYT0mean)^2-sum(ThatYT0mean^2))/n0n0)) / nfolds
  }
  mF = n / nfolds
  SF2 = var(papefold)
  covarterm = 1/mF^2*(mean(papefold)^2+2*(mF-1)*mean(papefold)*tau*(2*pF-1)-(1-pF)*pF*mF*tau^2)
  varcv = mF^2/(mF-1)^2*(Sf1+Sf0+covarterm)
  varexp = varcv + covij - covijtaui + covijtauij - (nfolds - 1)/ nfolds * min(SF2, varcv + covij - covijtaui + covijtauij)
  return(list(pape=mean(papefold),sd=sqrt(max(varexp,0))))
}


PAPDcv <- function (T, Thatfp, Thatgp, Y, ind, plim) {
  nfolds = max(ind)
  n = length(Y)
  n1 = sum(T)
  n0 = n - n1
  papepfold = c()
  Sfgp1 = 0
  Sfgp0 = 0  
  kf1 = c()
  kf0 = c()
  kg1 = c()
  kg0 = c()
  for (i in 1:nfolds) {
    output = PAPD(T[ind==i],Thatfp[ind==i,i],Thatgp[ind==i,i], Y[ind==i],plim)
    m = length(T[ind==i])
    m1 = sum(T[ind==i])
    m0 = m - m1
    probs=sum(That[ind==i,i])/m
    Sfgp1=Sfgp1 + var(((Thatfp[,i]-Thatgp[,i])*Y)[T==1 & ind==i])/(m1*nfolds)
    Sfgp0=Sfgp0 + var(((Thatfp[,i]-Thatgp[,i])*Y)[T==0 & ind==i])/(m0*nfolds)
    tempf1 = mean(Y[T==1 & Thatfp[,i]==1 & ind ==i])-mean(Y[T==0 & Thatfp[,i]==1] & ind ==i)
    tempf0 = mean(Y[T==1 & Thatfp[,i]==0 & ind ==i])-mean(Y[T==0 & Thatfp[,i]==0] & ind ==i)
    tempg1 = mean(Y[T==1 & Thatgp[,i]==1 & ind ==i])-mean(Y[T==0 & Thatgp[,i]==1] & ind ==i)
    tempg0 = mean(Y[T==1 & Thatgp[,i]==0 & ind ==i])-mean(Y[T==0 & Thatgp[,i]==0] & ind ==i)
    if (!is.nan(tempf1)) {
      kf1 = c(kf1, tempf1)
    }
    if (!is.nan(tempf0)) {
      kf0 = c(kf0, tempf0)
    }  
    if (!is.nan(tempf1)) {
      kg1 = c(kg1, tempg1)
    }
    if (!is.nan(tempf0)) {
      kg0 = c(kg0, tempg0)
    } 
    papdfold = c(papdfold, output$papd)
  }
  kf1 = mean(kf1)
  kf0 = mean(kf0)
  kg1 = mean(kg1)
  kg0 = mean(kg0)
  mF = n / nfolds
  SF2 = var(papdfold)
  varfp=Sfgp1+Sfgp0-floor(mF*plim)*(mF-floor(mF*plim))/(mF^2*(mF-1))*(kf1^2+kg1^2)+
    2*floor(mF*plim)*max(floor(mF*plim),mF-floor(mF*plim))/(mF^2*(mF-1))*abs(kf1*kg1)
  vartotal = varfp - (nfolds - 1) / nfolds * min(varfp, SF2)
  return(list(papd=mean(papdfold),sd=sqrt(max(vartotal,0))))
}


PAPEpcv <- function(T, That, Y, ind, plim) {
  nfolds = max(ind)
  n = length(Y)
  n1 = sum(T)
  n0 = n - n1
  papepfold = c()
  Sfp1 = 0
  Sfp0 = 0  
  kf1 = c()
  kf0 = c()
  for (i in 1:nfolds) {
    output = PAPEp(T[ind==i],That[ind==i,i],Y[ind==i],plim)
    m = length(T[ind==i])
    m1 = sum(T[ind==i])
    m0 = m - m1
    probs=sum(That[ind==i,i])/m
    Sfp1=Sfp1 + var(((That[,i]-plim)*Y)[T==1 & ind==i])/(m1*nfolds)
    Sfp0=Sfp0 + var(((That[,i]-plim)*Y)[T==0 & ind==i])/(m0*nfolds)
    temp1 = mean(Y[T==1 & That[,i]==1 & ind==i])-mean(Y[T==0 & That[,i]==1 & ind==i])
    temp0 = mean(Y[T==1 & That[,i]==0 & ind==i])-mean(Y[T==0 & That[,i]==0 & ind==i])
    if (!is.nan(temp1)) {
      kf1 = c(kf1, temp1)
    }
    if (!is.nan(temp0)) {
      kf0 = c(kf0, temp0)
    }
    papepfold = c(papepfold, output$papep)
  }
  mF = n / nfolds
  SF2 = var(papepfold)
  kf1 = mean(kf1)
  kf0 = mean(kf0)
  varfp=Sfp1+Sfp0+floor(mF*plim)*(mF-floor(mF*plim))/(mF^2*(mF-1))*((2*plim-1)*kf1^2-2*plim*kf1*kf0)
  vartotal = varfp - (nfolds - 1) / nfolds * min(varfp, SF2)
  return(list(papep=mean(papepfold),sd=sqrt(max(vartotal,0))))
}

AUPECcv<- function (T, tau, Y, ind) {
  nfolds = max(ind)
  n = length(Y)
  n1 = sum(T)
  n0 = n - n1
  aupecfold = c()
  sdfold = c()
  for (i in 1:nfolds) {
    output = AUPEC(T[ind==i],tau[ind==i,i],Y[ind==i])
    aupecfold = c(aupecfold, output$aupec)
    sdfold = c(sdfold, output$sd)
  }
  SF2 = var(aupecfold)
  vartotal =mean(sdfold^2) / nfolds
  return(list(aupec=mean(aupecfold),sd=sqrt(max(vartotal,0))))  
}

AUPECcv2 <- function(T, tau, Y, ind) {
  nfolds = max(indcv)
  aupecfold = c()
  SfA1 = 0
  SfA0 = 0
  covarsum1 = 0
  covarsum2 = c()
  for (i in 1:nfolds) {
    Tind = T[ind==i]
    tauind = tau[ind==i,i]
    Yind = Y[ind==i]
    n=length(Yind)
    n1=sum(Tind)
    n0=n-n1
    pf=sum(as.numeric(tauind>0))/n
    if (pf>0) {
      Z=rbinom(1e4,n,pf)
      Z=Z[Z>0]
      ThatfA=numeric(n)
      kAf1=numeric(n)
      kAf0=numeric(n)
      kAf1A=numeric(n)
      kAf1B=numeric(n)
      kAf0A=numeric(n)
      kAf0B=numeric(n)
      covarsum=0
      for (i in 1:n) {
        cutofftemp=quantile(tauind,1-i/n)
        Thatftemp=as.numeric(tauind>cutofftemp)
        ThatftempA=as.numeric(tauind>max(cutofftemp,0))
        ThatfA=ThatfA+1/n*ThatftempA
        cutofftemp2=quantile(tauind,(i-1)/n)
        Thatftemp2=as.numeric(tauind>cutofftemp2)
        tempkAf1=mean(Yind[Tind==1 & Thatftemp2==1])-mean(Yind[Tind==0 & Thatftemp2==1])
        tempkAf0=mean(Yind[Tind==1 & Thatftemp==0])-mean(Yind[Tind==0 & Thatftemp==0])
        if (is.nan(tempkAf1)) {
          kAf1[n-i+1]=kAf1[n-i+2]
        } else {
          kAf1[n-i+1]=tempkAf1
        }
        if (is.nan(tempkAf0)) {
          kAf0[i]=kAf0[i-1]
        } else {
          kAf0[i]=tempkAf0
        }
        kAf1A[n-i+1]=(n-i+1)*kAf1[n-i+1]
        kAf1B[n-i+1]=(i-1)*kAf1[n-i+1]
        kAf0A[i]=i*kAf0[i]
        kAf0B[i]=(n-i)*kAf0[i]
      }
      sumtemp1=cumsum(kAf1A*kAf0B)
      sumtemp2=cumsum(kAf1A)
      sumtemp3=cumsum(kAf1A*kAf1B)
      tempM=outer(kAf1A,kAf1B)
      tempM[lower.tri(tempM,diag=TRUE)] <- 0
      tempMsum=cumsum(colSums(tempM))
      covarsum1 = covarsum1 + mean(-1/(n^3*(n-1))*sumtemp1[Z]-Z*(n-Z)^2/(n^3*(n-1))*kAf1[Z]*kAf0[Z]-2/(n^4*(n-1))*tempMsum[Z]-Z^2*(n-Z)^2/(n^4*(n-1))*kAf1[Z]^2
                     -2*(n-Z)^2/(n^4*(n-1))*kAf1[Z]*sumtemp2[Z]+1/n^4*sumtemp3[Z]) / nfolds
      covarsum2 = c(covarsum2, 1/n*(sumtemp2[Z[1]]/n+(n-Z[1])*Z[1]/n*kAf1[Z[1]]))
      ThatfA2=ThatfA-1/2
      SfA1=SfA1 + var((ThatfA2*Yind)[Tind==1]) / (n1 * nfolds)
      SfA0=SfA0 + var((ThatfA2*Yind)[Tind==0]) / (n0 * nfolds)
      aupecfold = c(aupecfold, 1/n1*sum(Tind*ThatfA*Yind)+1/n0*sum(Yind*(1-Tind)*(1-ThatfA))-0.5/n1*sum(Tind*Yind)-0.5/n0*sum((1-Tind)*Yind))
    } else {
      aupecfold = c(aupecfold, 1/n0*sum(Yind*(1-Tind))-0.5/n1*sum(Tind*Yind)-0.5/n0*sum((1-Tind)*Yind))
    }
  }
  SF2 = var(aupecfold)
  varexp = SfA1 + SfA0 + covarsum1 + var(covarsum2)
  vartotal = varexp - (nfolds - 1) / nfolds * min(varexp, SF2)
  return(list(aupec=mean(aupecfold),sd=sqrt(max(vartotal,0))))  
}


truePAVcvLASSO<-function(n, binind, nfolds) {
  PAPEavg = 0
  result = foreach(i=1:cvtrials, .combine = c) %dopar% {
    source("generate_testing_data.R")
    library(glmnet)
    train=testData(n*(nfolds-1)/nfolds,binind)
    y_train = train$y
    xz_train = train[, !names(train) %in% c("y")]  
    x_expand<-model.matrix(~.*z,data=xz_train)
    ls1<-glmnet(x_expand,y_train,alpha=1,lambda=0.05)
    x = read.csv('Xt.csv',header=FALSE)
    x_full = x[,c(1,3,10,14,15,21,24,43)]
    x_full0 = cbind(x_full, z=0)
    x_expand0<-model.matrix(~.*z,data=x_full0)
    x_full1 = cbind(x_full, z=1)
    x_expand1<-model.matrix(~.*z,data=x_full1)
    yp0 = predict(ls1, x_expand0)
    yp1 = predict(ls1, x_expand1)
    That = as.numeric(yp1>yp0)  
    PAV<- truePAV(That,binind)   
    PAV
  }
  return(mean(result))
}

truePAPEcvLASSO<-function(n, binind, nfolds) {
  PAPEavg = 0
  result = foreach(i=1:cvtrials, .combine = c) %dopar% {
    source("generate_testing_data.R")
    library(glmnet)
    train=testData(n*(nfolds-1)/nfolds,binind)
    y_train = train$y
    xz_train = train[, !names(train) %in% c("y")]  
    x_expand<-model.matrix(~.*z,data=xz_train)
    ls1<-glmnet(x_expand,y_train,alpha=1,lambda=0.05)
    x = read.csv('Xt.csv',header=FALSE)
    x_full = x[,c(1,3,10,14,15,21,24,43)]
    x_full0 = cbind(x_full, z=0)
    x_expand0<-model.matrix(~.*z,data=x_full0)
    x_full1 = cbind(x_full, z=1)
    x_expand1<-model.matrix(~.*z,data=x_full1)
    yp0 = predict(ls1, x_expand0)
    yp1 = predict(ls1, x_expand1)
    That = as.numeric(yp1>yp0)  
    PAPE<- truePAPE(That,binind)   
    PAPE
  }
  return(mean(result))
}

truePAPEpcvLASSO<-function(n, binind,nfolds, plim) {
  PAPEavg = 0
  result = foreach(i=1:cvtrials, .combine = c) %dopar% {
    source("generate_testing_data.R")
    library(glmnet)
    train=testData(n*(nfolds-1)/nfolds,binind)
    y_train = train$y
    xz_train = train[, !names(train) %in% c("y")]  
    x_expand<-model.matrix(~.*z,data=xz_train)
    ls1<-glmnet(x_expand,y_train,alpha=1,lambda=0.05)
    x = read.csv('Xt.csv',header=FALSE)
    x_full = x[,c(1,3,10,14,15,21,24,43)]
    x_full0 = cbind(x_full, z=0)
    x_expand0<-model.matrix(~.*z,data=x_full0)
    x_full1 = cbind(x_full, z=1)
    x_expand1<-model.matrix(~.*z,data=x_full1)
    yp0 = predict(ls1, x_expand0)
    yp1 = predict(ls1, x_expand1)
    tau_full0 = yp1 - yp0 + runif(length(yp1),-1e-6,1e-6)
    That = as.numeric(tau_full0>quantile(tau_full0[folds[[t]]],1-plim))
    PAPE<- truePAPEp(That,binind,plim)   
    PAPE
  }
  return(mean(result))
}

trueAUPECcvLASSO<-function(n, binind,nfolds) {
  PAPEavg = 0
  result = foreach(i=1:cvtrials, .combine = c) %dopar% {
    source("generate_testing_data.R")
    library(glmnet)
    train=testData(n*(nfolds-1)/nfolds,binind)
    y_train = train$y
    xz_train = train[, !names(train) %in% c("y")]  
    x_expand<-model.matrix(~.*z,data=xz_train)
    ls1<-glmnet(x_expand,y_train,alpha=1,lambda=0.05)
    x = read.csv('Xt.csv',header=FALSE)
    x_full = x[,c(1,3,10,14,15,21,24,43)]
    x_full0 = cbind(x_full, z=0)
    x_expand0<-model.matrix(~.*z,data=x_full0)
    x_full1 = cbind(x_full, z=1)
    x_expand1<-model.matrix(~.*z,data=x_full1)
    yp0 = predict(ls1, x_expand0)
    yp1 = predict(ls1, x_expand1)
    tau_full0 = yp1 - yp0 + runif(length(yp1),-1e-6,1e-6)
    AUPEC<- trueAUPEC(tau_full0,binind)   
    AUPEC
  }
  return(mean(result))
}

### PAV Experiments
no_cores <- detectCores() -1
cl <- makeCluster(no_cores)
registerDoParallel(cl)
m = 1
cvtrials = 10000
ntrials = 1000
final_output0=matrix(0, 6, 10)
for (i in 0:1) {
  magnitude = i
  nseq=c(100,500,2000)
  for (k in 1:length(nseq)) {
    PAPEavg<- truePAVcvLASSO(nseq[k],magnitude,nfolds)
    PAPEu = numeric(ntrials)
    PAPEl = numeric(ntrials)
    PAPEm = numeric(ntrials)
    sdm = numeric(ntrials)
    tic()
    expresult=foreach (l=1:ntrials) %dopar% {
      source("generate_testing_data.R")
      library(caret)
      test = testData(nseq[k],magnitude)
      folds <- createFolds(test$z,k=nfolds)
      n = nrow(test)
      indcv = numeric(n)
      Tcv = test$z
      Thatcv = matrix(0,nrow = n,ncol = nfolds)
      Ycv = test$y
      for(t in 1:nfolds){
        testData <- test[folds[[t]], ]
        trainData <- test[-folds[[t]], ]
        y_train = trainData$y
        xz_train = trainData[, !names(trainData) %in% c("y")]  
        x_expand<-model.matrix(~.*z,data=xz_train)
        ls1<-glmnet(x_expand,y_train,alpha=1,lambda=0.05)
        
        y_test = test$y
        xz_test = test[, !names(testData) %in% c("z","y")]
        xz_test0 = cbind(xz_test, z=0)
        xz_test1 = cbind(xz_test, z=1)
        x_expand_test0<-model.matrix(~.*z,data=xz_test0)
        x_expand_test1<-model.matrix(~.*z,data=xz_test1)
        yp0 = predict(ls1, x_expand_test0)
        yp1 = predict(ls1, x_expand_test1)
        That = as.numeric(yp1>yp0) 
        indcv[folds[[t]]] = rep(t,nrow(testData))
        Thatcv[,t] = That
      }
      output <- PAVcv(Tcv, Thatcv, Ycv,indcv)
      pav = output$pav
      sdt = output$sd
      sdt
      return(list(pav+1.96*sdt, pav, pav-1.96*sdt, sdt))
    }
    toc()
    PAPEu=unlist(expresult)[c(TRUE,FALSE,FALSE,FALSE)]
    PAPEm=unlist(expresult)[c(FALSE,TRUE,FALSE,FALSE)]
    PAPEl=unlist(expresult)[c(FALSE,FALSE,TRUE,FALSE)]
    sdm=unlist(expresult)[c(FALSE,FALSE,FALSE,TRUE)]
    coverage=sum(as.numeric(PAPEavg<PAPEu & PAPEavg>PAPEl))/ntrials
    rmse = sqrt(mean((PAPEm-PAPEavg)*(PAPEm-PAPEavg)))
    mad = mean(abs(PAPEm-PAPEavg))
    asd=sd(PAPEm)
    rmsesd= sqrt(mean((sdm-asd)*(sdm-asd)))
    madsd = mean(abs(sdm-asd))
    final_output0[m,1]=magnitude*10
    final_output0[m,2]=nseq[k]
    final_output0[m,3]=coverage
    final_output0[m,4]=PAPEavg
    final_output0[m,5]=mean(PAPEm)-PAPEavg
    final_output0[m,6]=asd
    final_output0[m,7]=asd - mean(sdm)
    final_output0[m,8]=mad
    final_output0[m,9]=rmsesd
    final_output0[m,10]=madsd
    m = m+1
  }
}
stopCluster(cl)


### PAPE Experiments
no_cores <- detectCores() -1
cl <- makeCluster(no_cores)
registerDoParallel(cl)
m = 1
cvtrials = 10000
ntrials = 1000
final_output=matrix(0, 6, 10)
for (i in 0:1) {
  magnitude = i
  nseq=c(100,500,2000)
  for (k in 1:length(nseq)) {
    PAPEavg<- truePAPEcvLASSO(nseq[k],magnitude,nfolds)
    PAPEu = numeric(ntrials)
    PAPEl = numeric(ntrials)
    PAPEm = numeric(ntrials)
    sdm = numeric(ntrials)
    tic()
    expresult=foreach (l=1:ntrials) %dopar% {
      source("generate_testing_data.R")
      library(caret)
      test = testData(nseq[k],magnitude)
      folds <- createFolds(test$z,k=nfolds)
      n = nrow(test)
      indcv = numeric(n)
      Tcv = test$z
      Thatcv = matrix(0,nrow = n,ncol = nfolds)
      Ycv = test$y
      for(t in 1:nfolds){
        testData <- test[folds[[t]], ]
        trainData <- test[-folds[[t]], ]
        y_train = trainData$y
        xz_train = trainData[, !names(trainData) %in% c("y")]  
        x_expand<-model.matrix(~.*z,data=xz_train)
        ls1<-glmnet(x_expand,y_train,alpha=1,lambda=0.05)
        
        y_test = test$y
        xz_test = test[, !names(testData) %in% c("z","y")]
        xz_test0 = cbind(xz_test, z=0)
        xz_test1 = cbind(xz_test, z=1)
        x_expand_test0<-model.matrix(~.*z,data=xz_test0)
        x_expand_test1<-model.matrix(~.*z,data=xz_test1)
        yp0 = predict(ls1, x_expand_test0)
        yp1 = predict(ls1, x_expand_test1)
        That = as.numeric(yp1>yp0) 
        indcv[folds[[t]]] = rep(t,nrow(testData))
        Thatcv[,t] = That
      }
      output <- PAPEcv(Tcv, Thatcv, Ycv,indcv)
      pape = output$pape
      sdt = output$sd
      sdt
      return(list(pape+1.96*sdt, pape, pape-1.96*sdt, sdt))
    }
    toc()
    PAPEu=unlist(expresult)[c(TRUE,FALSE,FALSE,FALSE)]
    PAPEm=unlist(expresult)[c(FALSE,TRUE,FALSE,FALSE)]
    PAPEl=unlist(expresult)[c(FALSE,FALSE,TRUE,FALSE)]
    sdm=unlist(expresult)[c(FALSE,FALSE,FALSE,TRUE)]
    coverage=sum(as.numeric(PAPEavg<PAPEu & PAPEavg>PAPEl))/ntrials
    rmse = sqrt(mean((PAPEm-PAPEavg)*(PAPEm-PAPEavg)))
    mad = mean(abs(PAPEm-PAPEavg))
    asd=sd(PAPEm)
    rmsesd= sqrt(mean((sdm-asd)*(sdm-asd)))
    madsd = mean(abs(sdm-asd))
    final_output[m,1]=magnitude*10
    final_output[m,2]=nseq[k]
    final_output[m,3]=coverage
    final_output[m,4]=PAPEavg
    final_output[m,5]=mean(PAPEm)-PAPEavg
    final_output[m,6]=asd
    final_output[m,7]=asd - mean(sdm)
    final_output[m,8]=mad
    final_output[m,9]=rmsesd
    final_output[m,10]=madsd
    m = m+1
  }
}
stopCluster(cl)


# PAPEp Experiments
no_cores <- detectCores() -1
cl <- makeCluster(no_cores)
registerDoParallel(cl)
m = 1
cvtrials = 10000
ntrials = 1000
plim = 0.2
final_output2 = matrix(0, 6, 10)
for (i in 0:1) {
  magnitude = i
  nseq=c(100,500,2000)
  for (k in 1:length(nseq)) {
    PAPEavg<- truePAPEpcvLASSO(nseq[k],magnitude,nfolds,plim)
    PAPEu = numeric(ntrials)
    PAPEl = numeric(ntrials)
    PAPEm = numeric(ntrials)
    sdm = numeric(ntrials)
    tic()
    expresult=foreach (l=1:ntrials) %dopar% {
      source("generate_testing_data.R")
      library(caret)
      test = testData(nseq[k],magnitude)
      folds <- createFolds(test$z,k=nfolds)
      n = nrow(test)
      indcv = numeric(n)
      Tcv = test$z
      Thatcv = matrix(0,nrow = n,ncol = nfolds)
      Ycv = test$y
      for(t in 1:nfolds){
        testData <- test[folds[[t]], ]
        trainData <- test[-folds[[t]], ]
        y_train = trainData$y
        xz_train = trainData[, !names(trainData) %in% c("y")]  
        x_expand<-model.matrix(~.*z,data=xz_train)
        ls1<-glmnet(x_expand,y_train,alpha=1,lambda=0.05)
        
        y_test = test$y
        xz_test = test[, !names(testData) %in% c("z","y")]
        xz_test0 = cbind(xz_test, z=0)
        xz_test1 = cbind(xz_test, z=1)
        x_expand_test0<-model.matrix(~.*z,data=xz_test0)
        x_expand_test1<-model.matrix(~.*z,data=xz_test1)
        yp0 = predict(ls1, x_expand_test0)
        yp1 = predict(ls1, x_expand_test1)
        tau_full0 = yp1 - yp0 + runif(length(yp1),-1e-6,1e-6)
        That = as.numeric(tau_full0>quantile(tau_full0[folds[[t]]],1-plim))
        indcv[folds[[t]]] = rep(t,nrow(testData))
        Thatcv[,t] = That
      }
      output <- PAPEpcv(Tcv, Thatcv, Ycv,indcv,plim)
      pape = output$papep
      sdt = output$sd
      sdt
      return(list(pape+1.96*sdt, pape, pape-1.96*sdt, sdt))
    }
    toc()
    PAPEu=unlist(expresult)[c(TRUE,FALSE,FALSE,FALSE)]
    PAPEm=unlist(expresult)[c(FALSE,TRUE,FALSE,FALSE)]
    PAPEl=unlist(expresult)[c(FALSE,FALSE,TRUE,FALSE)]
    sdm=unlist(expresult)[c(FALSE,FALSE,FALSE,TRUE)]
    coverage=sum(as.numeric(PAPEavg<PAPEu & PAPEavg>PAPEl))/ntrials
    rmse = sqrt(mean((PAPEm-PAPEavg)*(PAPEm-PAPEavg)))
    mad = mean(abs(PAPEm-PAPEavg))
    asd=sd(PAPEm)
    rmsesd= sqrt(mean((sdm-asd)*(sdm-asd)))
    madsd = mean(abs(sdm-asd))
    final_output2[m,1]=magnitude*10
    final_output2[m,2]=nseq[k]
    final_output2[m,3]=coverage
    final_output2[m,4]=PAPEavg
    final_output2[m,5]=mean(PAPEm)-PAPEavg
    final_output2[m,6]=asd
    final_output2[m,7]=asd - mean(sdm)
    final_output2[m,8]=mad
    final_output2[m,9]=rmsesd
    final_output2[m,10]=madsd
    m = m+1
  }
}
stopCluster(cl)


# AUPEC Experiments
no_cores <- detectCores() -1
cl <- makeCluster(no_cores)
registerDoParallel(cl)
m = 1
cvtrials = 10000
ntrials = 1000

final_output3 = matrix(0, 6, 10)
for (i in 0:1) {
  magnitude = i
  nseq=c(100,500,2000)
  for (k in 1:length(nseq)) {
    tic()
    PAPEavg<- trueAUPECcvLASSO(nseq[k],magnitude,nfolds)
    toc()
    PAPEu = numeric(ntrials)
    PAPEl = numeric(ntrials)
    PAPEm = numeric(ntrials)
    sdm = numeric(ntrials)
    tic()
    expresult=foreach (l=1:ntrials) %dopar% {
      source("generate_testing_data.R")
      library(caret)
      test = testData(nseq[k],magnitude)
      folds <- createFolds(test$z,k=nfolds)
      n = nrow(test)
      indcv = numeric(n)
      Tcv = test$z
      taucv = matrix(0,nrow = n,ncol = nfolds)
      Ycv = test$y
      for(t in 1:nfolds){
        testData <- test[folds[[t]], ]
        trainData <- test[-folds[[t]], ]
        y_train = trainData$y
        xz_train = trainData[, !names(trainData) %in% c("y")]  
        x_expand<-model.matrix(~.*z,data=xz_train)
        ls1<-glmnet(x_expand,y_train,alpha=1,lambda=0.05)
        
        y_test = test$y
        xz_test = test[, !names(testData) %in% c("z","y")]
        xz_test0 = cbind(xz_test, z=0)
        xz_test1 = cbind(xz_test, z=1)
        x_expand_test0<-model.matrix(~.*z,data=xz_test0)
        x_expand_test1<-model.matrix(~.*z,data=xz_test1)
        yp0 = predict(ls1, x_expand_test0)
        yp1 = predict(ls1, x_expand_test1)
        tau = yp1 - yp0 + runif(length(yp1),-1e-6,1e-6)
        indcv[folds[[t]]] = rep(t,nrow(testData))
        taucv[,t] = tau
      }
      output <- AUPECcv2(Tcv, taucv, Ycv,indcv)
      aupec = output$aupec
      sdt = output$sd
      sdt
      return(list(aupec+1.96*sdt, aupec, aupec-1.96*sdt, sdt))
    }
    toc()
    PAPEu=unlist(expresult)[c(TRUE,FALSE,FALSE,FALSE)]
    PAPEm=unlist(expresult)[c(FALSE,TRUE,FALSE,FALSE)]
    PAPEl=unlist(expresult)[c(FALSE,FALSE,TRUE,FALSE)]
    sdm=unlist(expresult)[c(FALSE,FALSE,FALSE,TRUE)]
    coverage=sum(as.numeric(PAPEavg<PAPEu & PAPEavg>PAPEl))/ntrials
    rmse = sqrt(mean((PAPEm-PAPEavg)*(PAPEm-PAPEavg)))
    mad = mean(abs(PAPEm-PAPEavg))
    asd=sd(PAPEm)
    rmsesd= sqrt(mean((sdm-asd)*(sdm-asd)))
    madsd = mean(abs(sdm-asd))
    final_output3[m,1]=magnitude*10
    final_output3[m,2]=nseq[k]
    final_output3[m,3]=coverage
    final_output3[m,4]=PAPEavg
    final_output3[m,5]=mean(PAPEm)-PAPEavg
    final_output3[m,6]=asd
    final_output3[m,7]=asd - mean(sdm)
    final_output3[m,8]=mad
    final_output3[m,9]=rmsesd
    final_output3[m,10]=madsd
    m = m+1
  }
}
stopCluster(cl)

